from pdf2docx import Converter
from docx import Document


def pdf_to_docx_advanced(pdf_path, docx_path):
    # 创建 Converter 对象
    cv = Converter(pdf_path)

    # 设置表格处理参数
    # cv.bind(make_table=lambda x: True)  # 强制识别表格

    # 设置图像处理参数
    # cv.bind(img_folder='images', img_ext='png')  # 指定图像存储文件夹和格式

    # 将 PDF 转换为 Word
    cv.convert(docx_path, start=0, end=None)

    # 关闭 Converter
    cv.close()

    # 设置文档属性
    doc = Document(docx_path)
    core_properties = doc.core_properties
    core_properties.title = "Advanced Converted Document"
    core_properties.author = "Your Name"
    core_properties.subject = "PDF to DOCX Advanced Conversion"

    # 保存文档
    doc.save(docx_path)


if __name__ == '__main__':
    # 调用函数进行转换
    pdf_to_docx_advanced('/Users/lujiewen/asiainfo-sec/fujian-data/YDT 3799-2020 电信网和互联网网络安全防护定级备案实施指南.pdf', 'YDT 3799-2020 电信网和互联网网络安全防护定级备案实施指南.docx')